from evalscope import TaskConfig, run_task
from evalscope.constants import EvalType

task_cfg = TaskConfig(
    model='deepseek-r1',
    api_url='http://0.0.0.0:12345/v1/completions', 
    api_key='EMPTY',
    eval_type=EvalType.SERVICE,
    datasets=[
        'data_collection',
    ],
    dataset_args={
        'data_collection': {
            'dataset_id': 'test_math500.jsonl',
        }
    },
    eval_batch_size=16,  # num of workers to seed requests
    generation_config={
        'max_tokens': 20000,  # avoid exceed max length
        'temperature': 0.6,
        'top_p': 0.95,
        'n': 1 # num of repeat for each prompt (note lmdeploy only support n=1)
    },
)

run_task(task_cfg=task_cfg)
